# Reads MCMV data pre-organized by *local*  electoral period
# Merges it all, and then reads and merges in electoral data
# There is an equivalent code for presidential electoral periods


## Assumes data files are in subfolder "Data"

# Data files invoked:
# aux_mun_code5570.Rda: from CEM
# populacaomunicipio.csv: Official population data from basededados.org
# pib_percapita.csv: from IPEA (Felix lopez)
# Base de Dados Deficit Habitacional Municipal 2010.xlsx: from Fundação Dom Cabral
# Composicao_RMs_RIDEs_AglomUrbanas_2010_07_31.xls: from IBGE
# out-electoraldata-local.RData: processed electoral data publicly available from TSE
# out-mayorpartybyyear.RData: processed electoral data publicly available from TSE
# out-MCMV-munelecperiodlocal-mdr.RData: processed data obtained by freedom of information request to MDR
# out-firsttreated-ep-local.RData: processed data obtained by freedom of information request to MDR
# out-treatyears-local.RData: processed data obtained by freedom of information request to MDR

rm(list=ls())
library(plyr)
library(tidyverse)
library(readxl)
library(reshape)

# (1) Start with all municipalities 
# Create a dataset by electoral period (stacked)
load("DataPrivate/aux_mun_code5570.Rda")
mun_ids <- subset(mun_ids,select=c("uf_sigla",
                                   "NOME.DO.MUNICÍPIO",
                                   "Cod_TSE_5",
                                   "Cod_IBGE"))
names(mun_ids)[1]<- "uf"
names(mun_ids)[2]<- "mun"
mun_ids$reg <- factor(substr(mun_ids$Cod_IBGE,1,1))
d <- rbind(data.frame(mun_ids,elec.period=0),
      data.frame(mun_ids,elec.period=1),
      data.frame(mun_ids,elec.period=2),
      data.frame(mun_ids,elec.period=3))

# (2) Merge population and other municipal characteristics 
# (2a) Here we will have one baseline population value that is fixed (pop.2008)
# And another measured at the start of each period (populacao)
pop0 <- read_csv("DataPrivate/populacaomunicipio.csv") 
pop <- subset(pop0,ano==2008,select=c(id_municipio,populacao))
pop$Cod_IBGE <- as.character(pop$id_municipio)
names(pop)[2] <- "pop.2008"
pop$id_municipio <- NULL
d <- merge(d,pop,by="Cod_IBGE",all.x=T)
  
#But we will also have population at the start of the electoral period
pop <- subset(pop0,is.element(ano,c(2005,2009,2013,2017)),select=c(id_municipio,populacao,ano))
pop <- na.omit(pop)
pop$elec.period <- ifelse(pop$ano==2005,0,
                          ifelse(pop$ano==2009,1,
                          ifelse(pop$ano==2013,2,3)))
pop$Cod_IBGE <- as.character(pop$id_municipio)
pop$ano <- pop$id_municipio <- NULL
names(pop)[1]<-"pop"
d <- merge(d,pop,by=c("Cod_IBGE","elec.period"),all.x=T)

# (2b) Read GDP per capita data
gdp <- read.csv2("DataPrivate/pib_percapita.csv")
gdp$elec.period <- ifelse(gdp$ano==2005,0,
                          ifelse(gdp$ano==2009,1,
                                 ifelse(gdp$ano==2013,2,
                                        ifelse(gdp$ano==2017,3,NA))))
gdp <- na.omit(subset(gdp,select=c(id_municipio,elec.period,pib_pc)))
d <- merge(d,gdp,by.x=c("Cod_IBGE","elec.period")
                ,by.y=c("id_municipio","elec.period"),all.x=T)

# (2c) housing deficit
hd <- read_excel("DataPrivate/Base de Dados Deficit Habitacional Municipal 2010.xlsx",
                 sheet=1,skip=6)
names(hd)[1] <- 'NOME_NIVEL'
hd <- subset(hd,NOME_NIVEL=='MUNICÍPIO',
             select=c(codigo3,DHTT01,DHTT02))
names(hd)[2:3]  <- c("hdtot.2010","hdtotrel.2010")
hd$Cod_IBGE <- gsub("\\D","",hd$codigo3)
hd$codigo3 <- NULL
d <- merge(d,hd,by=c("Cod_IBGE"),all.x=T)

# (2d) municipalities that were in Metropolitan regions as of 2009
mr <- read_excel("DataPrivate/Composicao_RMs_RIDEs_AglomUrbanas_2010_07_31.xls",
                 sheet=1,n_max=697)
names(mr)[3] <- "Cod_IBGE"
mr<- subset(mr,Tipo=="RM",select=c(Cod_IBGE,Tipo))
mr$Tipo <- mr$Tipo=="RM"
names(mr)[2] <- "mr.2010"
d <- merge(d,mr,by="Cod_IBGE",all.x=T)
d$mr.2010[which(is.na(d$mr.2010))]<-F

# (2e) get period of first treatment
load("DataPrivate/out-firsttreated-ep-local.RData")
d <- merge(d,ft, by="Cod_IBGE",all.x=T)

# (2f) get years in which houses were built in each municipality and compute treatment-years in each cycle
# zero indicates treatment in the year of the election (excluding those after election)
# one is in the year before the election, etc...
load("DataPrivate/out-treatyears-local.RData")
d <- merge(d,years.treated, by=c("Cod_IBGE","elec.period"),all.x=T)

# (3) Get the electoral data prepared with Data_ReadElectoral.R
# We have term limits status for all, but we don't 
# have incumbents for all municipalities
load("DataPrivate/out-electoraldata-local.RData") 
cat(comment(elect.data))

# Merge term limits
d <- merge(d,elect.data$etl,by=c("Cod_TSE_5","elec.period"),all.x=T)
# Merge incumbent performance
cat("Elections with identified incumbent candidates")
table(elect.data$all.e$elec.period)
d <- merge(d,elect.data$all.e,by=c("Cod_TSE_5","elec.period"),all.x=T)
table(is.na(d$inc.vs),d$elec.period) #check (should match previous table)

# Create a variable with the pre-treatment (2008) incumbent vote share
tmp.2008 <- subset(d,elec.period==0,select=c(Cod_IBGE,inc.vs))
names(tmp.2008)[2] <- "inc.vs.2008"
d <- merge(d,tmp.2008,by=c("Cod_IBGE"),all.x=T)
rm(tmp.2008)

#Merge MCMV individual contracts with MCMV constructin contracts
# There are four different sources of MCMV beneficiary data
# (a) Our original LAI request (separate FAR and PNHR figures, but no OP, through mid 2017)
# (b) Laisas data, which has figures for OP and FAR together (through 2017)
# (c) Our recent Caixa LAI request (only FAR, through 2021)
# (d*) Ou rmost recent MDR LAI request (OP and all others together, through 2021)
#load("out-MCMV-munelecperiod.RData")#dmunep; original LAI request
#load("out-MCMV-munelecperiod-laisa.RData")#dlmunep; Laisa and Cecilia FAR+OP data
#load("out-MCMV-munelecperiod-new.RData")#dnmunep; Second LAI request (FAR only)
load("DataPrivate/out-MCMV-munelecperiodlocal-mdr.RData")#dnnmunep; MDR LAI request (FAR+FDS+PNHR and OP)
cat(comment(dnnmunep))
d <- merge(d,dnnmunep[,-c(2,3)],by=c("Cod_IBGE","elec.period"),all.x=T)
# Turn NAs into 0s (municipalities without MCMV in the period)
d$Nop.mdr[which(is.na(d$Nop.mdr))]<-0
d$Nfar.mdr[which(is.na(d$Nfar.mdr))]<-0
d$Npnhr.mdr[which(is.na(d$Npnhr.mdr))]<-0
# Compute the aggregate of interest to us
d$N <- d$Nop.mdr+d$Nfar.mdr+d$Npnhr.mdr

# Compute cummlative MCMV (over periods) by municipality
  tmp <- subset(d,select=c(Cod_IBGE,elec.period,N))
  tmpN <- ddply(tmp,.(Cod_IBGE),function(x){cumsum(x$N)}) 
  tmpN <- melt(tmpN,id.vars="Cod_IBGE")
  names(tmpN)<-c("Cod_IBGE","elec.period","cN")
  tmpN$elec.period <- as.numeric(gsub("\\D","",tmpN$elec.period))-1
d <- merge(d,tmpN,by=c("Cod_IBGE","elec.period"),all.x=T)

# Cleanup
d$years.treated <- NULL

comment(d) <- paste("Data_Assemble_ElectoralPeriod.R on",Sys.time(), 
                    "\nN = beneficiaries in period",
                    "\ncN = cummulative beneficiaires (over periods)\n.")
save(d, file="Data/dataset-mcmv-by-localelectoral-period.RData")
